## ---- gender-attribution ----

italian_names_by_gender <- read.csv("replication_file_11_italian_names_by_gender.csv", header=FALSE)
italian_names_by_gender <- unique(italian_names_by_gender)
italian_names_by_gender <- italian_names_by_gender[
  (!duplicated(italian_names_by_gender$V1) &
     !duplicated(italian_names_by_gender$V1, fromLast = TRUE)),]

sqliteGetTable <- function(database, table) {
  require(DBI)
  require(RSQLite)
  con <- dbConnect(RSQLite::SQLite(), dbname = database)
  query <- dbSendQuery(con, paste("SELECT * FROM ", table, ";", sep="")) 
  result <- fetch(query, n = -1)
  dbClearResult(query)
  dbDisconnect(con)
  return(result)
}
meetup_members_2014 <- sqliteGetTable('meetup_aug14.sqlite', 'member')

# Functions
simpleCap <- function(string) {
  s <- strsplit(string, " ")[[1]]
  paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
}

firstnamesFirst <- function (name, name_dictionary) {
  name_dictionary <- tolower(as.character(name_dictionary[[1]]))
  name <- as.character(name)
  name <- tolower(name)
  name <- unlist(strsplit(name, " "))
  
  for (char in name) {
    if (char %in% name_dictionary) {
      return(char)
    }
    
  }
  return(NA)
}

genderAttribution <- function(string, dictionary) {
  require(stringr)
  
  # First name first
  string <- firstnamesFirst(string, dictionary)
  
  # Substring first word (first name)
  string <- stringr::word(string, 1)
  
  # Remove white spaces 
  string <- gsub("-", " ", string)
  
  # Capitalise first letter to match dictionary
  string <- simpleCap(string)
  
  # Create vector with matched gender
  return(as.character(dictionary[[2]][match(string, dictionary[[1]])]))
}

require(parallel)
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)
X <- lsf.str()
clusterExport(cl=cl, varlist=c(as.vector(X)), envir=environment())

assigned_gender_vec <- 
  unlist(parLapply(cl, meetup_members_2014$name, genderAttribution, italian_names_by_gender))

stopCluster(cl)

meetup_members_2014_gender <- 
  data.frame(member_id = meetup_members_2014$member_id, 
             gender = assigned_gender_vec, stringsAsFactors = FALSE)
save(meetup_members_2014_gender, 
     file = 'replication_file_12_meetup_members_2014_gender.RData')
